/*
 *  Copyright (C) 2011 Jaime Pavlich-Mariscal
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package cl.ucn.disc.biblio.refcluster.reference;

import java.util.Comparator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Set;

import cl.ucn.disc.biblio.refcluster.clustering.ClusterManager;
import cl.ucn.disc.biblio.refcluster.database.TupleOfStrings;
import cl.ucn.disc.biblio.refcluster.database.TupleOfStringsMatcher;
import cl.ucn.disc.biblio.refcluster.util.MultiMap;
import ds.tree.DuplicateKeyException;
import ds.tree.RadixTree2;
import ds.tree.RadixTree2Impl;

/** Parses and stores information of "sentences", which are tuples of strings separated by a specific character, usually spaces.
 * @author Jaime Pavlich
 *
 */
public class Sentence implements TupleOfStrings<Sentence> {

    private Comparator<Word> sizeComparator = new Comparator<Word>() {

        @Override
        public int compare(Word w1, Word w2) {
            if (w1.string.length() < w2.string.length()) {
                return -1;
            } else if (w1.string.length() > w2.string.length()) {
                return 1;
            } else {
                return 0;
            }
        }
    };
    @SuppressWarnings("unused")
	private Comparator<Word> textComparator = new Comparator<Word>() {

        @Override
        public int compare(Word w1, Word w2) {
            return w1.string.compareTo(w2.string);
        }
    };
    String string;
    @Override
	public String getString() {
		return string;
	}

    
	/** Keeps the words of this sentence ordered by size. This is useful for the matching algorithm to
	 * cluster references.
	 * @see ClusterManager#clusterReferences(java.util.Collection)
	 * 
	 */
	Queue<Word> words = new PriorityQueue<Word>(10, sizeComparator); 
	
    /** A radix tree used to quickly find superstrings and substrings of a word
     * 
     */
    RadixTree2<String> prefixTree = new RadixTree2Impl<String>();
    
    /** Maps a specific string to the corresponding instances of {@link Word} corresponding to that string.
     * Note that one sentence may have several equal words, which will be mapped from the same key in this multimap. 
     * 
     */
    MultiMap<String, Word> wordsMultiMap = new MultiMap<String, Word>();
    
    
	/** Matches sentences according to their similarity.
	 * @see Sentence#isSimilarTo(Sentence)
	 * 
	 */
	public static TupleOfStringsMatcher<Sentence> SIMILAR_SENTENCE_MATCHER = new TupleOfStringsMatcher<Sentence>() {
	
	    @Override
	    public boolean matches(Sentence s1, Sentence s2) {
	        return s1.isSimilarTo(s2);
	    }
	};

    public Sentence() {
        
    }
    
    /** Constructs a sentence using a string
     * @param string
     */
    public Sentence(String string) {
        parse(string);
    }

    /** Constructs a sentence, using {@code separator} as the character to separate words
     * @param string The string from which the sentence will be constructed
     * @param separator The character that separates words in the sentence
     */
    public Sentence(String string, String separator) {
        parse(string, separator);
    }

    
    @Override
    public void parse(String string) {
        parse(string, "\\s+");
    }

    /** Parses a string and populates the internal structures of {@link Sentence} using that information. 
     * @param string
     * @param separator
     */
    public void parse(String string, String separator) {
        this.string = string;
        String[] ws = separator == null ? new String[]{string} : string.split(separator);
        for (String wordStr : ws) {
            Word word = new Word(wordStr, this);
            words.add(word);
            wordsMultiMap.add(word.string, word);

            try {
                prefixTree.insert(wordStr, wordStr);
            } catch (DuplicateKeyException e) {
                // nothing to do
            }
        }
    }

 

    /**
     * Verifies whether this sentence is similar another sentence. Two sententences
     * are similar if all of the words of each sentence are either a superstring or a
     * substring of a word in the other sentence.
     *
     * @param s2
     * @return
     */
    public boolean isSimilarTo(Sentence s2) {
        if (words.size() == s2.words.size()) {
            Set<Word> matchedWords = new LinkedHashSet<Word>(); // DO NOT USE TreeSet
            Set<Word> matchedWords2 = new LinkedHashSet<Word>(); // DO NOT USE TreeSet
            matchWords(this, s2, matchedWords, matchedWords2);
            matchWords(s2, this, matchedWords2, matchedWords);

            // This Sentence matches s2 only if all of the words in each sentence match at least
            // one word of the other sentence
            return matchedWords.size() == words.size() && matchedWords2.size() == s2.words.size();
        } else {
            return false;
        }
    }

    /** Convenience method used by {@link Sentence#isSimilarTo(Sentence)}.
     * Populates {@code matchedWordsS1} with words of {@code sentence1} that have at least one word of {@code sentence2}
     * that is a superstring; vice versa for {@matchedWordsS2}.
     * @param sentence1
     * @param sentence2
     * @param matchedWordsS1
     * @param matchedWordsS2
     */
    private static void matchWords(Sentence sentence1, Sentence sentence2, Set<Word> matchedWordsS1,  Set<Word> matchedWordsS2) {
        for (Word w : sentence1.words) {
            List<String> superStringsOfW = sentence2.prefixTree.searchPrefix(w.string, Integer.MAX_VALUE);//.getLeafValues(w.string);
            if (!superStringsOfW.isEmpty()) {
                // w becomes a matched word, since there is at least one superstring of w in s2
                matchedWordsS1.add(w);
            }
            for (String str : superStringsOfW) {
                Set<Word> superStringWordsOfS = sentence2.wordsMultiMap.get(str);
                for (Word wordOfS : superStringWordsOfS) {
                    // All of the words that are superstrings of w also become matched words
                    matchedWordsS2.add(wordOfS);
                }
            }
        }
    }

    @Override
    public String toString() {
//		return "Sentence [" + (words != null ? "words=" + words : "") + "]";
        return "\'" + string + "\'";
    }

    @Override
    public int compareTo(Sentence o) {
        return string.compareTo(o.string);
    }

    @Override
    public boolean equals(Object obj) {
        if (obj == null) {
            return false;
        }
        if (getClass() != obj.getClass()) {
            return false;
        }
        final Sentence other = (Sentence) obj;
        if ((this.string == null) ? (other.string != null) : !this.string.equals(other.string)) {
            return false;
        }
        return true;
    }

    @Override
    public int hashCode() {
        int hash = 7;
        hash = 83 * hash + (this.string != null ? this.string.hashCode() : 0);
        return hash;
    }

	@Override
	public String getUnprocessedString() {
		return string;
	}


}
